Supergene Mediated Differential Expression Analysis

This is an R markdown document illustrating the bioinformatic methods used to generate the results for the manuscript by Arsenault et al. entitled “Simple inheritance, complex regulation: supergene-mediated fire ant queen polymorphism”. If you have any questions about this document, please direct them to Sam Arsenault at samarsenault93@gmail.com or Brendan G. Hunt at brendan.hunt@gmail.com

Load in Relevant Libraries

library(edgeR)
library(UpSetR)
library(readr)
library(magick)
library(gridExtra)
library(karyoploteR)
library(cowplot)
library(ggplot2)
library(topGO)
library(data.table)
library(eulerr)
library(pheatmap)
library(CircStats)
library(RColorBrewer)
library(randtests)
library(readxl)

Load in Relevant Custom Functions

make_polar_DEG <- function(TT_table,radial_lim) {
  ## Things to Add: 
  library(ggplot2)
  #  library(gridExtra)
  library(cowplot)
  internal <- TT_table
  internal$neglogFDR <- -log10(TT_table$FDR)
  internal$angle <- atan2(TT_table$logFC.GenotypeBb,TT_table$logFC.Genotypebb)
  internal$abslogBb <- abs(TT_table$logFC.GenotypeBb)
  internal$abslogbb <- abs(TT_table$logFC.Genotypebb)
  internal$length <- internal[,c("abslogBb","abslogbb")][cbind(seq_len(nrow(internal)), max.col(internal[,c("abslogBb","abslogbb")]))]
  d=data.frame(angle=c(atan2(1,2),atan2(1,1),atan2(2,1),atan2(1,0),atan2(1,-1),atan2(0,-1),atan2(-1,-2),atan2(-1,-1),atan2(-2,-1),atan2(-1,0),atan2(-1,1),atan2(0,1)), 
               category=c("bb>Bb>BB", "bb=Bb>BB", "Bb>bb>BB", "Bb>BB=bb", "Bb>BB>bb", "Bb=BB>bb","BB>Bb>bb","BB>Bb=bb","BB>bb>Bb","BB=bb>Bb","bb>BB>Bb","bb>Bb=BB"))
  gg_scatter <- ggplot() +
    geom_point(data=internal, aes(x=angle, y=neglogFDR),shape=1) + 
    theme_bw() +
    theme(axis.text.x=element_blank()) + 
    coord_polar(theta="x",clip="off") +
    scale_x_continuous(breaks = seq(-pi, pi, pi/3), limits = c(-pi, pi)) +
    scale_y_continuous(breaks = seq(0,radial_lim,5),limits = c(0,radial_lim)) +
    geom_vline(xintercept = c(atan2(1,2),atan2(1,1),atan2(2,1),atan2(1,0),atan2(1,-1),atan2(0,-1),atan2(-1,-2),atan2(-1,-1),atan2(-2,-1),atan2(-1,0),atan2(-1,1),atan2(0,1))) +
    geom_histogram(d=internal,aes(x=angle,y=10*..ncount..),fill="green", alpha=0.3,bins=60) + 
    geom_text(data=d, mapping=aes(x=angle, y=radial_lim, label=category), size=4, angle=0, vjust=0, hjust=0.5)  
  return(gg_scatter)
} ## Make the Polar coordinate Dominance Plot

make_volcano <- function(et,Title) {
  library(ggplot2)
  resFilt <- topTags(et, n=nrow(et$table))
  
  volcanoData <- cbind(resFilt$table$logFC, -log10(resFilt$table$FDR))
  colnames(volcanoData) <- c("logFC", "negLogPval")
  volcanoData <- data.frame(volcanoData)
  volcanoData$Sig <- volcanoData$negLogPval > log10(0.01)*(-1)
  x_bounds = round(max(abs(volcanoData$logFC)))
  y_bound = round(volcanoData$negLogPval)
  # first set up the plot
  volcPlot <- ggplot(volcanoData,aes(logFC,negLogPval))
  # then add the points
  volcPlot <- volcPlot + geom_point(aes(colour = Sig),alpha=0.4, size=2)
  #  volcPlot <- volcPlot + geom_hline(yintercept = log10(0.01)*(-1),size=1) + geom_vline(xintercept = -1,size=1) + geom_vline(xintercept = 1,size=1) + geom_vline(xintercept = 0,size=1) + 
  volcPlot <- volcPlot + geom_hline(yintercept = log10(0.01)*(-1),size=1) + geom_vline(xintercept = 0,size=1) + 
    labs(x = "logFC", y = "-log10(P-value)",title=Title) + 
    theme(legend.position="none", axis.title.x = element_text(face="bold", size=16), axis.text.x  = element_text(size=12), axis.title.y = element_text(face="bold", size=16), axis.text.y  = element_text(size=12)) + 
    scale_x_continuous(limits=c((-1)*x_bounds,x_bounds),breaks=c((-1)*x_bounds,-1,0,1,x_bounds)) + 
    # scale_y_continuous(limits=c(0,y_bound)) + 
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(hjust = 0.5))
  test <- topTags(et,p.value = 0.01,n=100000)
  length(test$table$logFC)
  LO <- sum(test$table$logFC < (-1))
  LI <- sum((test$table$logFC < 0)&(test$table$logFC >= (-1)))
  RI <- sum((test$table$logFC <= 1)&(test$table$logFC > 0))
  RO <- sum(test$table$logFC > 1)
  print(LO)
  print(LI)
  print(RI)
  print(RO)
  return(volcPlot)
  
} ## Create a standardized volcano plot for a given edgeR QLFtest output.

generate_GO_table <- function(data,Name) {
  QLF_TT_all <- topTags(data,p.value = 1,n=Inf)$table
  QLF_TT_sig <- topTags(data,p.value = 0.01,n=Inf)$table
  
  ## Load in custom annotation
  sinv_GO <- readMappings("~/Desktop/Sinv_Analyses/NCBI_genome/NCBI_lg_gene.annot.map", sep = "\t", IDsep=",")
  
  ## Load Specific Gene List 
  myInterestingGenes <- row.names(QLF_TT_sig)
  
  ## Load Full Gene Set
  geneNames <- row.names(QLF_TT_all)
  
  ## Create geneList Structure 
  geneList <- factor(as.integer(geneNames %in% myInterestingGenes))
  names(geneList) <- geneNames
  
  ## Prep for file output
  output_file = Name
  
  ### Run for BP
  
  ## Laod the data into a topGO data object (BP,MF,CC)
  GOData <- new("topGOdata",ontology="BP",allGenes=geneList,description="GO analysis of DEGs with q<0.01", annot = annFUN.gene2GO, gene2GO = sinv_GO)
  
  ## Compute GO term enrichment
  resultClassic <- runTest(GOData, algorithm="classic", statistic="fisher")
  resultElim <- runTest(GOData, algorithm="elim", statistic="fisher")
  resultTopgo <- runTest(GOData, algorithm="weight01", statistic="fisher")
  
  allGO = usedGO(object = GOData) 
  
  allRes <- GenTable(GOData, classicFisher = resultClassic, elimFisher = resultElim, topgoFisher = resultTopgo, orderBy = "topgoFisher", ranksOf = "topgoFisher", topNodes = length(allGO))
  
  aa <- sapply(unname(sapply(allRes$GO.ID, function(x) {genes<-genesInTerm(GOData, x); genes[[1]][sapply(genes[[1]],function(id) id %in% myInterestingGenes)]})),paste0,collapse=",")
  allRes$genes <- aa
  
  res_print <- allRes[(allRes$topgoFisher<=0.05),c(1:5,8)]
  output_file4 = paste(output_file,"TopGO_BP_sig.pdf", sep="_")
  pdf(file = output_file4,width = 10)
  plot.new()
  grid.table(res_print,rows=NULL)
  dev.off()
  
  GOData <- new("topGOdata",ontology="MF",allGenes=geneList,description="GO analysis of DEGs with q<0.01", annot = annFUN.gene2GO, gene2GO = sinv_GO)
  
  ## Compute GO term enrichment
  resultClassic <- runTest(GOData, algorithm="classic", statistic="fisher")
  resultElim <- runTest(GOData, algorithm="elim", statistic="fisher")
  resultTopgo <- runTest(GOData, algorithm="weight01", statistic="fisher")
  
  allGO = usedGO(object = GOData) 
  
  allRes <- GenTable(GOData, classicFisher = resultClassic, elimFisher = resultElim, topgoFisher = resultTopgo, orderBy = "topgoFisher", ranksOf = "topgoFisher", topNodes = length(allGO))
  
  aa <- sapply(unname(sapply(allRes$GO.ID, function(x) {genes<-genesInTerm(GOData, x); genes[[1]][sapply(genes[[1]],function(id) id %in% myInterestingGenes)]})),paste0,collapse=",")
  allRes$genes <- aa
  
  res_print <- allRes[(allRes$topgoFisher<=0.05),c(1:5,8)]
  output_file4 = paste(output_file,"TopGO_MF_sig.pdf", sep="_")
  pdf(file = output_file4,width = 10)
  plot.new()
  grid.table(res_print,rows=NULL)
  dev.off()
  
  GOData <- new("topGOdata",ontology="CC",allGenes=geneList,description="GO analysis of DEGs with q<0.01", annot = annFUN.gene2GO, gene2GO = sinv_GO)
  
  ## Compute GO term enrichment
  resultClassic <- runTest(GOData, algorithm="classic", statistic="fisher")
  resultElim <- runTest(GOData, algorithm="elim", statistic="fisher")
  resultTopgo <- runTest(GOData, algorithm="weight01", statistic="fisher")
  
  allGO = usedGO(object = GOData) 
  
  allRes <- GenTable(GOData, classicFisher = resultClassic, elimFisher = resultElim, topgoFisher = resultTopgo, orderBy = "topgoFisher", ranksOf = "topgoFisher", topNodes = length(allGO))
  
  aa <- sapply(unname(sapply(allRes$GO.ID, function(x) {genes<-genesInTerm(GOData, x); genes[[1]][sapply(genes[[1]],function(id) id %in% myInterestingGenes)]})),paste0,collapse=",")
  allRes$genes <- aa
  
  res_print <- allRes[(allRes$topgoFisher<=0.05),c(1:5,8)]
  output_file4 = paste(output_file,"TopGO_CC_sig.pdf", sep="_")
  pdf(file = output_file4,width = 10)
  plot.new()
  grid.table(res_print,rows=NULL)
  dev.off()
  
  return()
} ## Compute and Print tables of GO term enrichment values

convert_LOC <- function(dataframe) {
  library(rtracklayer)
  library(rentrez)
  Si_gnG_Ensembl <- readGFF(file="~/Downloads/Solenopsis_invicta.Si_gnG.41.gff3")
  Si_gnG_refseq <- readGFF(file="/Users/samarsenault/Desktop/Sinv_Analyses/NCBI_genome/GCF_000188075.1_Si_gnG/GCF_000188075.1_Si_gnG_genomic.gff")
  refseq_translation <- data.frame(Si_gnG_refseq[(Si_gnG_refseq$type=="gene")|(Si_gnG_refseq$type=="pseudogene"),c("ID","Name")])
  Ensembl_translation <- data.frame(Si_gnG_Ensembl[(Si_gnG_Ensembl$type=="gene")|(Si_gnG_Ensembl$type=="pseudogene"),c("ID","description")])
  Ensembl_translation$ID <- gsub("gene:", "", Ensembl_translation$ID)
  merged_translation <- merge(x=refseq_translation,y=Ensembl_translation,by.x="Name",by.y="ID")
  g <- merge(x=dataframe,y=merged_translation,by.y="ID",by.x=0,all.x=TRUE)
  for (i in 1:nrow(g)){
    if (!is.na(g$Name[i])&is.na(g$description[i])){
      search_term <- gsub(pattern = "LOC",replacement = "",x = g$Name[i])
      g$description[i] <- entrez_summary(db="gene",id = search_term)[3]
    }
  }
  return(g)
} ## Add descriptions and proper gene names (LOC) to the data.frame

generate_heatmap_frame <- function(gene_names){
  library(karyoploteR)
  temp <- Sinv_annot[(row.names(Sinv_annot) %in% gene_names),]
  temp.sig<-temp[(grepl("lg",temp$chr)),]
  temp.sig <- temp.sig[complete.cases(temp.sig), ]
  DEG_GR <- makeGRangesFromDataFrame(temp.sig, seqnames.field = "chr",start.field = "start",end.field = "end",keep.extra.columns = TRUE)
  windows <- unlist(tileGenome(stats::setNames(kp$chromosome.lengths, kp$chromosomes),tilewidth = 2e6))
  dens <- countOverlaps(windows, DEG_GR)
  values(windows) <- data.frame(y = dens)
  return(windows)
} ## Takes a list of gene names and makes a heatmap object to pass to karyoploteR

generate_heatmap_frame_SNP <- function(SNP_data){
  library(karyoploteR)
  DEG_GR <- makeGRangesFromDataFrame(SNP_data, seqnames.field = "lg",start.field = "newstart",end.field = "newend",keep.extra.columns = TRUE)
  windows <- unlist(tileGenome(stats::setNames(kp$chromosome.lengths, kp$chromosomes),tilewidth = 500000))
  dens <- countOverlaps(windows, DEG_GR)
  values(windows) <- data.frame(y = dens)
  return(windows)
} ## Takes a list of SNPs and makes a heatmap object to pass to karyoploteR

check_supergene_presence_bias <- function(all_genes_TT_table){
  all_genes_TT_table_merge <- merge(all_genes_TT_table,Sinv_annot_lg,by.x=0,by.y="X4",all.x=TRUE)
  supergene_genes_int <- all_genes_TT_table_merge[((all_genes_TT_table_merge$X1=="lg16")&(all_genes_TT_table_merge$X2>=7311525)&(all_genes_TT_table_merge$X3<=18123987)),c("Row.names")]
  supergene_genes_int <- supergene_genes_int[!is.na(supergene_genes_int)]
  nonsuper_genes_int <- all_genes_TT_table_merge[((grepl(pattern = "lg",x = all_genes_TT_table_merge$X1))&(all_genes_TT_table_merge$Row.names %!in% supergene_genes_int)),c("Row.names")]
  
  in_super_sig <- all_genes_TT_table[((row.names(all_genes_TT_table) %in% supergene_genes_int)&(all_genes_TT_table$FDR<=0.01)),]
  in_super_not <- all_genes_TT_table[((row.names(all_genes_TT_table) %in% supergene_genes_int)&(all_genes_TT_table$FDR>0.01)),]
  out_super_sig <- all_genes_TT_table[((row.names(all_genes_TT_table) %in% nonsuper_genes_int)&(all_genes_TT_table$FDR<=0.01)),]
  out_super_not <- all_genes_TT_table[((row.names(all_genes_TT_table) %in% nonsuper_genes_int)&(all_genes_TT_table$FDR>0.01)),]
  out <- c(nrow(in_super_sig),nrow(in_super_not),nrow(out_super_sig),nrow(out_super_not),nrow(all_genes_TT_table))
  print(out[1:4])
  print(sum(out[1:4])-out[5])
  chi_out <- chisq.test(matrix(out[1:4],nrow=2),correct = FALSE)
  print(chi_out$observed)
  print(chi_out$expected)
  print(chi_out$p.value)
  print(fisher.test(x=matrix(out[1:4],nrow=2),alternative = "two.sided"))
  return()
} ## Takes a full TopTags file (all p-values) and checks for an overrepresentation of significant DEGs in the supergene as opposed to outside of it. 

check_supergene_dir_bias <- function(sig_genes_TT_table){
  sig_genes_TT_table_merge <- merge(sig_genes_TT_table,Sinv_annot_lg,by.x=0,by.y="X4",all.x=TRUE)
  supergene_genes_int <- sig_genes_TT_table_merge[((sig_genes_TT_table_merge$X1=="lg16")&(sig_genes_TT_table_merge$X2>=7311525)&(sig_genes_TT_table_merge$X3<=18123987)),c("Row.names")]
  supergene_genes_int <- supergene_genes_int[!is.na(supergene_genes_int)]
  nonsuper_genes_int <- sig_genes_TT_table_merge[((grepl(pattern = "lg",x = sig_genes_TT_table_merge$X1))&(sig_genes_TT_table_merge$Row.names %!in% supergene_genes_int)),c("Row.names")]
  in_super_up <- sig_genes_TT_table[((row.names(sig_genes_TT_table) %in% supergene_genes_int)&(sig_genes_TT_table$logFC>0)),]
  in_super_down <- sig_genes_TT_table[((row.names(sig_genes_TT_table) %in% supergene_genes_int)&(sig_genes_TT_table$logFC<0)),]
  out_super_up <- sig_genes_TT_table[((row.names(sig_genes_TT_table) %in% nonsuper_genes_int)&(sig_genes_TT_table$logFC>0)),]
  out_super_down <- sig_genes_TT_table[((row.names(sig_genes_TT_table) %in% nonsuper_genes_int)&(sig_genes_TT_table$logFC<0)),]
  out <- c(nrow(in_super_up),nrow(in_super_down),nrow(out_super_up),nrow(out_super_down),nrow(sig_genes_TT_table))
  print(out[1:4])
  print(sum(out[1:4])-out[5])
  chi_out <- chisq.test(matrix(out[1:4],nrow=2),correct = FALSE)
  print(chi_out$observed)
  print(chi_out$expected)
  print(chi_out$p.value)
  print(fisher.test(x=matrix(out[1:4],nrow=2),alternative = "two.sided"))
  return()
} ## Takes a table of significant DEGs and assesses whether or not there is a directional bias of differential expression within versus outside of the supergene. 

'%!in%' <- function(x,y)!('%in%'(x,y)) ## Quality of life function

General Information Load

## Load in the gene lengths computed from the annotation for RPKM calculation
len_file=read.delim(file="/Users/samarsenault/Desktop/Sinv_Analyses/Sinv_NCBI_counts/Genelength.tsv",sep="\t")
head(len_file)
##   GeneID Length
## 1  gene0   1839
## 2  gene1    894
## 3  gene2   2253
## 4  gene3    870
## 5  gene4     76
## 6  gene5    972
## Load in the counts file generated using the 2-pass method from STAR
x <- read.delim("/Users/samarsenault/Desktop/Sinv_Analyses/Sinv_NCBI_counts/NCBI_All_counts.tsv",row.names=1,sep="\t")
head(x)
##       lib_104AB lib_104AO lib_104DB lib_104GB lib_104GO lib_107AB
## gene0         0        27         0         9        27         0
## gene1         0         0         0         0         0         0
## gene2         1        16         0         0         7         0
## gene3     23142      2727     21787     22937      2385     10573
## gene4         0         0         0         0         0         0
## gene5        14         4         0         8         4         4
##       lib_107AO lib_107EB lib_107EO lib_107GB lib_107GO lib_15AB lib_15AO
## gene0        24         0         9         0        10        0       10
## gene1         0         0         0         0         0        0        0
## gene2        24        29        10        88         8       41        4
## gene3      3340     15382      4155     17307      4557     6960     7256
## gene4         0         0         0         0         0        0        0
## gene5         3        15         1        20         0       12        0
##       lib_15DB lib_15DO lib_15GB lib_15GO lib_16CB lib_16CO lib_16DB
## gene0        0       20        0        0        0        0        0
## gene1        0        0        0        0        0        0        0
## gene2        5       14        0       16       27       25        0
## gene3     5436     3270    15016     3305     1937     2446     7587
## gene4        0        0        0        0        0        0        0
## gene5        2        1        2        1       34        0       25
##       lib_16DO lib_16GB lib_16GO lib_19AB lib_19AO lib_19DB lib_19DO
## gene0       21        0        3        0        8        0        0
## gene1        0        0        0        0        0        0        0
## gene2        9        0       11       20        3       38        8
## gene3     2199     6104     3032    13182     3015    12381     2358
## gene4        0        0        0        0        0        0        0
## gene5        0        6        0       25        1       82        0
##       lib_19GB lib_19GO lib_1EB lib_1EO lib_207AB lib_207AO lib_209AB
## gene0        0        6       2      57        42         5         0
## gene1        0        0       0       0         0         0         0
## gene2        3       12       5      15        71         2        41
## gene3    13280     7304     904    1416     11555      3013     16333
## gene4        0        0       0       0         0         0         0
## gene5       25        0      48       3        73         0        24
##       lib_209AO lib_20AB lib_20AO lib_20DB lib_20DO lib_20IB lib_20IO
## gene0         0        5       13        0        2        0       14
## gene1         0        0        0        0        0        0        0
## gene2         9       86       11       24       11        2       11
## gene3      4266     9281     3669    24916     4310    17208     4228
## gene4         0        0        0        0        0        0        0
## gene5         0       35        0       19        0       23        0
##       lib_222AB lib_222AO lib_232AB lib_232AO lib_233AB lib_233AO
## gene0         0         5         0         4         0         4
## gene1         0         0         0         0         0         0
## gene2        26         8        44        10         0         4
## gene3     31449      3370     10125      3138     12777      3220
## gene4         0         0         0         0         0         0
## gene5        13         0         9         0        36         0
##       lib_235AB lib_235AO lib_239AB lib_239AO lib_240AB lib_240AO lib_30AB
## gene0         0         0         0         4         0         0        2
## gene1         0         0         0         0         0         0        0
## gene2        24         5         0         9         0         5      267
## gene3     12547      5417     25319      4123     17585      3988     5901
## gene4         0         0         0         0         0         0        0
## gene5        20         0         1         0        15         0        6
##       lib_30AO lib_30EB lib_30EO lib_30GB lib_30GO lib_5AB lib_5AO lib_5GB
## gene0       13        0        3        0        1       0       9      10
## gene1        0        0        0        0        0       0       0       0
## gene2        2        5        8       89        8      44      11      15
## gene3     2905     7458     4653    11744     2184    4405    3841    5980
## gene4        0        0        0        0        0       0       0       0
## gene5        0       11        0       12        0      35       0      18
##       lib_5GO
## gene0       8
## gene1       0
## gene2      10
## gene3    2152
## gene4       0
## gene5       0
## Load in the model matrix containing information formatted for both pairiwse and GLM comparisons
adv_group <- read.delim("/Users/samarsenault/Desktop/Sinv_Analyses/Sinv_model_matrix5.tsv",row.names = 1,sep = "\t")
head(adv_group)
##       Individual_ID Colony_ID Tissue Social.Form Genotype
## 104AB          104A       104  Brain    Polygyne       Bb
## 104AO          104A       104  Ovary    Polygyne       Bb
## 104DB          104D       104  Brain    Polygyne       BB
## 104GB          104G       104  Brain    Polygyne       bb
## 104GO          104G       104  Ovary    Polygyne       bb
## 107AB          107A       107  Brain    Polygyne       Bb
##                  Alt_ID
## 104AB Brain.Polygyne.Bb
## 104AO Ovary.Polygyne.Bb
## 104DB Brain.Polygyne.BB
## 104GB Brain.Polygyne.bb
## 104GO Ovary.Polygyne.bb
## 107AB Brain.Polygyne.Bb
## Load in the annotation information from a bed file
Sinv_annot <- read_delim("~/Desktop/Sinv_Analyses/NCBI_genome/Genetic_Mapping/NCBI_lg_gene_v3.bed", 
                         "\t", escape_double = FALSE, col_names = FALSE, 
                         trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_double(),
##   X3 = col_double(),
##   X4 = col_character()
## )
row.names(Sinv_annot) <- Sinv_annot$X4 ## Rename the rows of the this variable with gene names
## Warning: Setting row names on a tibble is deprecated.
colnames(Sinv_annot) <- c("chr","start","end","gene") ## rename the columns with better description
head(Sinv_annot)
## # A tibble: 6 x 4
##   chr         start   end gene     
##   <chr>       <dbl> <dbl> <chr>    
## 1 NC_014672.1 10229 11174 gene15723
## 2 NC_014672.1 14342 15323 gene15724
## 3 NC_014672.1  4157  4507 gene15717
## 4 NC_014672.1  4929  6596 gene15718
## 5 NC_014672.1  8466  8996 gene15721
## 6 NC_014672.1  9021 10142 gene15722
## Load in the linkage mapped genome annotations
Sinv_annot_lg <- read_delim("~/Desktop/Sinv_Analyses/NCBI_genome/Genetic_Mapping/NCBI_lg_gene_v4.bed", 
                         "\t", escape_double = FALSE, col_names = FALSE, 
                         trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_double(),
##   X3 = col_double(),
##   X4 = col_character()
## )

Pairwise Comparisons

y_pw <- DGEList(x,group = adv_group$Alt_ID)

y_pw$samples
##                       group lib.size norm.factors
## lib_104AB Brain.Polygyne.Bb 23839271            1
## lib_104AO Ovary.Polygyne.Bb 21048826            1
## lib_104DB Brain.Polygyne.BB 23454386            1
## lib_104GB Brain.Polygyne.bb 24389394            1
## lib_104GO Ovary.Polygyne.bb 20495668            1
## lib_107AB Brain.Polygyne.Bb 20141694            1
## lib_107AO Ovary.Polygyne.Bb 17197423            1
## lib_107EB Brain.Polygyne.BB 21829149            1
## lib_107EO Ovary.Polygyne.BB 19294902            1
## lib_107GB Brain.Polygyne.bb 25168093            1
## lib_107GO Ovary.Polygyne.bb 16074944            1
## lib_15AB  Brain.Polygyne.Bb 14960249            1
## lib_15AO  Ovary.Polygyne.Bb 16752710            1
## lib_15DB  Brain.Polygyne.BB 13227223            1
## lib_15DO  Ovary.Polygyne.BB 19369746            1
## lib_15GB  Brain.Polygyne.bb 15342322            1
## lib_15GO  Ovary.Polygyne.bb 15462774            1
## lib_16CB  Brain.Polygyne.Bb 10874244            1
## lib_16CO  Ovary.Polygyne.Bb 18167411            1
## lib_16DB  Brain.Polygyne.BB 18118617            1
## lib_16DO  Ovary.Polygyne.BB 15154985            1
## lib_16GB  Brain.Polygyne.bb 14252480            1
## lib_16GO  Ovary.Polygyne.bb 11697881            1
## lib_19AB  Brain.Polygyne.Bb 21454629            1
## lib_19AO  Ovary.Polygyne.Bb 14055961            1
## lib_19DB  Brain.Polygyne.BB 22242852            1
## lib_19DO  Ovary.Polygyne.BB 12250091            1
## lib_19GB  Brain.Polygyne.bb 22026200            1
## lib_19GO  Ovary.Polygyne.bb 18419244            1
## lib_1EB   Brain.Polygyne.BB  9465077            1
## lib_1EO   Ovary.Polygyne.BB 15489624            1
## lib_207AB Brain.Monogyne.BB 23545677            1
## lib_207AO Ovary.Monogyne.BB 13189348            1
## lib_209AB Brain.Monogyne.BB 25094494            1
## lib_209AO Ovary.Monogyne.BB 18649952            1
## lib_20AB  Brain.Polygyne.Bb 20937228            1
## lib_20AO  Ovary.Polygyne.Bb 19166278            1
## lib_20DB  Brain.Polygyne.BB 21474982            1
## lib_20DO  Ovary.Polygyne.BB 20286421            1
## lib_20IB  Brain.Polygyne.bb 23393220            1
## lib_20IO  Ovary.Polygyne.bb 19737048            1
## lib_222AB Brain.Monogyne.BB 26950035            1
## lib_222AO Ovary.Monogyne.BB 18315290            1
## lib_232AB Brain.Monogyne.BB 15462514            1
## lib_232AO Ovary.Monogyne.BB 17487369            1
## lib_233AB Brain.Monogyne.BB 18101509            1
## lib_233AO Ovary.Monogyne.BB 16506089            1
## lib_235AB Brain.Monogyne.BB 17354499            1
## lib_235AO Ovary.Monogyne.BB 22630544            1
## lib_239AB Brain.Monogyne.BB 23630798            1
## lib_239AO Ovary.Monogyne.BB 19564509            1
## lib_240AB Brain.Monogyne.BB 19079343            1
## lib_240AO Ovary.Monogyne.BB 18214783            1
## lib_30AB  Brain.Polygyne.Bb 18465236            1
## lib_30AO  Ovary.Polygyne.Bb 13595740            1
## lib_30EB  Brain.Polygyne.BB 13622038            1
## lib_30EO  Ovary.Polygyne.BB 18007543            1
## lib_30GB  Brain.Polygyne.bb 16763253            1
## lib_30GO  Ovary.Polygyne.bb 10097662            1
## lib_5AB   Brain.Polygyne.Bb 13352005            1
## lib_5AO   Ovary.Polygyne.Bb 11809254            1
## lib_5GB   Brain.Polygyne.bb 16318389            1
## lib_5GO   Ovary.Polygyne.bb 11915595            1
## Filter out low count genes
keep <- rowSums(cpm(y_pw)>10/9) >= 8 ## NEED TO EDIT!!!
len_file <- len_file[ as.vector(keep) , ]
y_pw <- y_pw[keep, , keep.lib.sizes=FALSE]

## Normalize samples by library size
y_pw <- calcNormFactors(y_pw)
Background.genes <- row.names(y_pw$counts)

## Calculate RPKM values
adv_group$FullID <- paste(adv_group$Individual_ID,adv_group$Alt_ID,sep = ".")

## Begin Computing Differential Expression
design_pw <- model.matrix(~0+adv_group$Alt_ID,data=y_pw$samples)
colnames(design_pw) <- levels(y_pw$samples$group)

y_pw <- estimateDisp(y_pw,design_pw)
y_pw <- estimateGLMCommonDisp(y_pw, design_pw)
y_pw <- estimateGLMTrendedDisp(y_pw, design_pw)
y_pw <- estimateGLMTagwiseDisp(y_pw, design_pw)

## QLM Fit and DE analysis
fit_pw <- glmQLFit(y_pw,design_pw)

my.contrasts <- makeContrasts(Brain_M_v_P_BB=Brain.Monogyne.BB-Brain.Polygyne.BB, 
                              Brain_BB_v_Bb=Brain.Polygyne.BB-Brain.Polygyne.Bb, 
                              Brain_Bb_v_bb=Brain.Polygyne.Bb-Brain.Polygyne.bb, 
                              Brain_mBB_v_pBb=Brain.Monogyne.BB-Brain.Polygyne.Bb, 
                              Brain_mBB_v_pbb=Brain.Monogyne.BB-Brain.Polygyne.bb, 
                              Ovary_M_v_P_BB=Ovary.Monogyne.BB-Ovary.Polygyne.BB, 
                              Ovary_BB_v_Bb=Ovary.Polygyne.BB-Ovary.Polygyne.Bb, 
                              Ovary_Bb_v_bb=Ovary.Polygyne.Bb-Ovary.Polygyne.bb, 
                              Ovary_mBB_v_pBb=Ovary.Monogyne.BB-Ovary.Polygyne.Bb, 
                              Ovary_mBB_v_pbb=Ovary.Monogyne.BB-Ovary.Polygyne.bb, 
                              Brain_v_Ovary_pBB=Brain.Polygyne.BB-Ovary.Polygyne.BB,
                              Brain_v_Ovary_mBB=Brain.Monogyne.BB-Ovary.Monogyne.BB,
                              Brain_v_Ovary_pBb=Brain.Polygyne.Bb-Ovary.Polygyne.Bb,
                              Brain_v_Ovary_pbb=Brain.Polygyne.bb-Ovary.Polygyne.bb,
                              Ovary_BB_v_bb=Ovary.Polygyne.BB-Ovary.Polygyne.bb, 
                              Brain_BB_v_bb=Brain.Polygyne.BB-Brain.Polygyne.bb, 
                              levels=design_pw)

pw.qlf.Brain_M_v_P_BB <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_M_v_P_BB"])
pw.qlf.Brain_M_v_P_BB.TT <- topTags(pw.qlf.Brain_M_v_P_BB,p.value = 1,n=100000)
pw.qlf.Brain_M_v_P_BB.TT.sig <- topTags(pw.qlf.Brain_M_v_P_BB,p.value = 0.01,n=100000)$table
pw.qlf.Brain_M_v_P_BB.TT.names <- row.names(pw.qlf.Brain_M_v_P_BB.TT$table)

pw.qlf.Ovary_M_v_P_BB <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_M_v_P_BB"])
pw.qlf.Ovary_M_v_P_BB.TT <- topTags(pw.qlf.Ovary_M_v_P_BB,p.value = 1,n=100000)
pw.qlf.Ovary_M_v_P_BB.TT.sig <- topTags(pw.qlf.Ovary_M_v_P_BB,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_M_v_P_BB.TT.names <- row.names(pw.qlf.Ovary_M_v_P_BB.TT$table)

pw.qlf.Brain_BB_v_Bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_BB_v_Bb"])
pw.qlf.Brain_BB_v_Bb.TT <- topTags(pw.qlf.Brain_BB_v_Bb,p.value = 1,n=100000)
pw.qlf.Brain_BB_v_Bb.TT.sig <- topTags(pw.qlf.Brain_BB_v_Bb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_BB_v_Bb.TT.names <- row.names(pw.qlf.Brain_BB_v_Bb.TT$table)

pw.qlf.Ovary_BB_v_Bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_BB_v_Bb"])
pw.qlf.Ovary_BB_v_Bb.TT <- topTags(pw.qlf.Ovary_BB_v_Bb,p.value = 1,n=100000)
pw.qlf.Ovary_BB_v_Bb.TT.sig <- topTags(pw.qlf.Ovary_BB_v_Bb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_BB_v_Bb.TT.names <- row.names(pw.qlf.Ovary_BB_v_Bb.TT$table)

pw.qlf.Brain_Bb_v_bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_Bb_v_bb"])
pw.qlf.Brain_Bb_v_bb.TT <- topTags(pw.qlf.Brain_Bb_v_bb,p.value = 1,n=100000)
pw.qlf.Brain_Bb_v_bb.TT.sig <- topTags(pw.qlf.Brain_Bb_v_bb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_Bb_v_bb.TT.names <- row.names(pw.qlf.Brain_Bb_v_bb.TT$table)

pw.qlf.Ovary_Bb_v_bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_Bb_v_bb"])
pw.qlf.Ovary_Bb_v_bb.TT <- topTags(pw.qlf.Ovary_Bb_v_bb,p.value = 1,n=100000)
pw.qlf.Ovary_Bb_v_bb.TT.sig <- topTags(pw.qlf.Ovary_Bb_v_bb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_Bb_v_bb.TT.names <- row.names(pw.qlf.Ovary_Bb_v_bb.TT$table)

pw.qlf.Brain_v_Ovary_pBB <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_v_Ovary_pBB"])
pw.qlf.Brain_v_Ovary_pBB.TT <- topTags(pw.qlf.Brain_v_Ovary_pBB,p.value = 1,n=100000)
pw.qlf.Brain_v_Ovary_pBB.TT.sig <- topTags(pw.qlf.Brain_v_Ovary_pBB,p.value = 0.01,n=100000)$table
pw.qlf.Brain_v_Ovary_pBB.TT.names <- row.names(pw.qlf.Brain_v_Ovary_pBB.TT$table)

pw.qlf.Brain_v_Ovary_mBB <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_v_Ovary_mBB"])
pw.qlf.Brain_v_Ovary_mBB.TT <- topTags(pw.qlf.Brain_v_Ovary_mBB,p.value = 1,n=100000)
pw.qlf.Brain_v_Ovary_mBB.TT.sig <- topTags(pw.qlf.Brain_v_Ovary_mBB,p.value = 0.01,n=100000)$table
pw.qlf.Brain_v_Ovary_mBB.TT.names <- row.names(pw.qlf.Brain_v_Ovary_mBB.TT$table)

pw.qlf.Brain_v_Ovary_pBb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_v_Ovary_pBb"])
pw.qlf.Brain_v_Ovary_pBb.TT <- topTags(pw.qlf.Brain_v_Ovary_pBb,p.value = 1,n=100000)
pw.qlf.Brain_v_Ovary_pBb.TT.sig <- topTags(pw.qlf.Brain_v_Ovary_pBb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_v_Ovary_pBb.TT.names <- row.names(pw.qlf.Brain_v_Ovary_pBb.TT$table)

pw.qlf.Brain_v_Ovary_pbb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_v_Ovary_pbb"])
pw.qlf.Brain_v_Ovary_pbb.TT <- topTags(pw.qlf.Brain_v_Ovary_pbb,p.value = 1,n=100000)
pw.qlf.Brain_v_Ovary_pbb.TT.sig <- topTags(pw.qlf.Brain_v_Ovary_pbb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_v_Ovary_pbb.TT.names <- row.names(pw.qlf.Brain_v_Ovary_pbb.TT$table)

pw.qlf.Brain_BB_v_bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_BB_v_bb"])
pw.qlf.Brain_BB_v_bb.TT <- topTags(pw.qlf.Brain_BB_v_bb,p.value = 1,n=100000)
pw.qlf.Brain_BB_v_bb.TT.sig <- topTags(pw.qlf.Brain_BB_v_bb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_BB_v_bb.TT.names <- row.names(pw.qlf.Brain_BB_v_bb.TT$table)

pw.qlf.Ovary_BB_v_bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_BB_v_bb"])
pw.qlf.Ovary_BB_v_bb.TT <- topTags(pw.qlf.Ovary_BB_v_bb,p.value = 1,n=100000)
pw.qlf.Ovary_BB_v_bb.TT.sig <- topTags(pw.qlf.Ovary_BB_v_bb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_BB_v_bb.TT.names <- row.names(pw.qlf.Ovary_BB_v_bb.TT$table)

pw.qlf.Ovary_mBB_v_pBb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_mBB_v_pBb"])
pw.qlf.Ovary_mBB_v_pBb.TT <- topTags(pw.qlf.Ovary_mBB_v_pBb,p.value = 1,n=100000)
pw.qlf.Ovary_mBB_v_pBb.TT.sig <- topTags(pw.qlf.Ovary_mBB_v_pBb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_mBB_v_pBb.TT.names <- row.names(pw.qlf.Ovary_mBB_v_pBb.TT$table)

pw.qlf.Ovary_mBB_v_pbb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_mBB_v_pbb"])
pw.qlf.Ovary_mBB_v_pbb.TT <- topTags(pw.qlf.Ovary_mBB_v_pbb,p.value = 1,n=100000)
pw.qlf.Ovary_mBB_v_pbb.TT.sig <- topTags(pw.qlf.Ovary_mBB_v_pbb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_mBB_v_pbb.TT.names <- row.names(pw.qlf.Ovary_mBB_v_pbb.TT$table)

pw.qlf.Brain_mBB_v_pBb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_mBB_v_pBb"])
pw.qlf.Brain_mBB_v_pBb.TT <- topTags(pw.qlf.Brain_mBB_v_pBb,p.value = 1,n=100000)
pw.qlf.Brain_mBB_v_pBb.TT.sig <- topTags(pw.qlf.Brain_mBB_v_pBb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_mBB_v_pBb.TT.names <- row.names(pw.qlf.Brain_mBB_v_pBb.TT$table)

pw.qlf.Brain_mBB_v_pbb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_mBB_v_pbb"])
pw.qlf.Brain_mBB_v_pbb.TT <- topTags(pw.qlf.Brain_mBB_v_pbb,p.value = 1,n=100000)
pw.qlf.Brain_mBB_v_pbb.TT.sig <- topTags(pw.qlf.Brain_mBB_v_pbb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_mBB_v_pbb.TT.names <- row.names(pw.qlf.Brain_mBB_v_pbb.TT$table)


## Generate Volcano Plots for the Pairiwse Comparisons
# Pairwise, genotype-specific comparisons (Figure S1,A-D)
make_volcano(pw.qlf.Brain_BB_v_Bb,"Brain SB/SB vs. SB/Sb")
## [1] 36
## [1] 1
## [1] 0
## [1] 5

make_volcano(pw.qlf.Brain_BB_v_bb,"Brain SB/SB vs. Sb/Sb")
## [1] 68
## [1] 6
## [1] 3
## [1] 67
## Warning: Removed 3 rows containing missing values (geom_point).

make_volcano(pw.qlf.Ovary_BB_v_Bb,"Ovary SB/SB vs. SB/Sb")
## [1] 40
## [1] 2
## [1] 1
## [1] 0
## Warning: Removed 1 rows containing missing values (geom_point).

make_volcano(pw.qlf.Ovary_BB_v_bb,"Ovary SB/SB vs. Sb/Sb")
## [1] 179
## [1] 20
## [1] 13
## [1] 28
## Warning: Removed 1 rows containing missing values (geom_point).

# Pairwise, social form comparisons (Embedded within Figure 3B,C)
make_volcano(pw.qlf.Brain_M_v_P_BB,"Brain Monogyne SB/SB vs. Polygyne SB/SB")
## [1] 13
## [1] 3
## [1] 1
## [1] 5

make_volcano(pw.qlf.Ovary_M_v_P_BB,"Ovary Monogyne SB/SB vs. Polygyne SB/SB")
## [1] 0
## [1] 0
## [1] 0
## [1] 0

# Pairwise, combinatorial-effect comparisons (Embedded within Figure 3B,C)
make_volcano(pw.qlf.Brain_mBB_v_pBb,"Brain Monogyne SB/SB vs. Polygyne SB/Sb")
## [1] 30
## [1] 0
## [1] 2
## [1] 4
## Warning: Removed 1 rows containing missing values (geom_point).

make_volcano(pw.qlf.Ovary_mBB_v_pBb,"Ovary Monogyne SB/SB vs. Polygyne SB/Sb")
## [1] 175
## [1] 14
## [1] 9
## [1] 7
## Warning: Removed 1 rows containing missing values (geom_point).

## Generate UpSetR plot of Pairwise comparisons (Figure 2C)
PW_set <- list(Brain_pBB_pBb=row.names(pw.qlf.Brain_BB_v_Bb.TT.sig),
                 Brain_pBB_pbb=row.names(pw.qlf.Brain_BB_v_bb.TT.sig),
                 Ovary_pBB_pBb=row.names(pw.qlf.Ovary_BB_v_Bb.TT.sig),
                 Ovary_pBB_pbb=row.names(pw.qlf.Ovary_BB_v_bb.TT.sig)
                 )
upset(fromList(PW_set),nsets=4,order.by = "freq")

## Generate Social Form Euler Plots (Figure 3B,C)
Tissue_set <- list(
  Brain_mBBvpBB=row.names(pw.qlf.Brain_M_v_P_BB.TT.sig),
  Ovary_mBBvpBB=row.names(pw.qlf.Ovary_M_v_P_BB.TT.sig),
  Brain_pBBvpBb=row.names(pw.qlf.Brain_BB_v_Bb.TT.sig),
  Ovary_pBBvpBb=row.names(pw.qlf.Ovary_BB_v_Bb.TT.sig),
  Brain_mBBvpBb=row.names(pw.qlf.Brain_mBB_v_pBb.TT.sig),
  Ovary_mBBvpBb=row.names(pw.qlf.Ovary_mBB_v_pBb.TT.sig)
)

brain_euler <- plot(euler(Tissue_set[c(1,3,5)], shape = "ellipse"), quantities = TRUE)
brain_euler

ovary_euler <- plot(euler(Tissue_set[c(2,4,6)], shape = "ellipse"), quantities = TRUE)
## Warning in colSums(id & !empty) == 0 | merged_sets: longer object length is
## not a multiple of shorter object length
ovary_euler

Generalized Linear Model Analysis

## Format model matrix
Colony_ID <- factor(adv_group$Colony_ID)
Tissue <- factor(adv_group$Tissue)
Social.Form <- factor(adv_group$Social.Form)
Genotype_temp <- factor(adv_group$Genotype)
Genotype2 <- relevel(Genotype_temp, ref = "BB")

## Describe the Experiment Design and define Contrasts
glm_design <- model.matrix(~0 + Colony_ID + Social.Form + Tissue * Genotype2, 
    data = adv_group)

colnames(glm_design) <- c("Colony", "Monogyne", "Polygyne", "TissueOvary", "Genotypebb", 
    "GenotypeBb", "Tissuebb", "TissueBb")

y_glm <- DGEList(x, samples = glm_design)

## Filter out low count genes
y_glm <- y_glm[(rowSums(cpm(y_glm) > 10/9) >= 8), , keep.lib.sizes = FALSE]  ## Based on the smallest number of aligned reads in our libraries as per edgeR documentation

### Normalize samples by library size
y_glm <- calcNormFactors(y_glm)

Background.genes <- row.names(y_glm$counts)

## Begin Computing Differential Expression
y_glm <- estimateDisp(y_glm, glm_design)
y_glm <- estimateGLMCommonDisp(y_glm, glm_design)
y_glm <- estimateGLMTrendedDisp(y_glm, glm_design)
y_glm <- estimateGLMTagwiseDisp(y_glm, glm_design)

## QLM Fit and DE analysis
fit_glm <- glmQLFit(y_glm, glm_design)

## Compute Differential expression based on number of supergene alleles DE
## Analysis for Paper
glm.QLF.BBvBb <- glmQLFTest(fit_glm, coef = "GenotypeBb")
glm.QLF.BBvBb_TT <- topTags(glm.QLF.BBvBb, n = Inf)
glm.QLF.BBvBb_TT_sig <- topTags(glm.QLF.BBvBb_TT, p.value = 0.01, n = Inf)$table

glm.QLF.BBvbb <- glmQLFTest(fit_glm, coef = "Genotypebb")
glm.QLF.BBvbb_TT <- topTags(glm.QLF.BBvbb, n = Inf)
glm.QLF.BBvbb_TT_sig <- topTags(glm.QLF.BBvbb_TT, p.value = 0.01, n = Inf)$table

glm.QLF.all <- glmQLFTest(fit_glm, coef = 5:6)
glm.QLF.all_TT <- topTags(glm.QLF.all, n = Inf)
glm.QLF.all_TT_sig <- topTags(glm.QLF.all_TT, p.value = 0.01, n = Inf)$table


## Compute differential expression based on social form
con.SF <- makeContrasts(Monogyne - Polygyne, levels = glm_design)
glm.QLF.SF <- glmQLFTest(fit_glm, contrast = con.SF)
glm.QLF.SF_TT <- topTags(glm.QLF.SF, n = Inf)
glm.QLF.SF_TT_sig <- topTags(glm.QLF.SF_TT, p.value = 0.01, n = Inf)$table

## Visualizations ## Note here that due to the way the contrasts are set up,
## the directionality of the volcano plots is flipped. This was rectified in
## the manuscript.  BB vs. Bb Volcano Plot (Figure 2A)
make_volcano(glm.QLF.BBvBb, "GLM BB vs. Bb")
## [1] 1
## [1] 1
## [1] 1
## [1] 47

# BB vs. bb Volcano Plot (Figure 2B)
make_volcano(glm.QLF.BBvbb, "GLM BB vs. bb")
## [1] 35
## [1] 2
## [1] 9
## [1] 87

# Monogyne vs Polygyne Volcano Plot (Figure 3A)
make_volcano(glm.QLF.SF, "GLM Monogyne vs. Polygyne")
## [1] 0
## [1] 0
## [1] 0
## [1] 0

GO Term Enrichment Analysis on pairwise and glm differential expression results

## GO Term Enrichment Analysis
generate_GO_table(pw.qlf.Brain_BB_v_Bb, "Brain_BBvBL")
## NULL
generate_GO_table(pw.qlf.Brain_BB_v_bb, "Brain_BBvLL")
## NULL
generate_GO_table(pw.qlf.Ovary_BB_v_Bb, "Ovary_BBvBL")
## NULL
generate_GO_table(pw.qlf.Ovary_BB_v_bb, "Ovary_BBvLL")
## NULL
generate_GO_table(glm.QLF.BBvBb, "GLM_BBvBL")
## NULL
generate_GO_table(glm.QLF.BBvbb, "GLM_BBvLL")
## NULL

Generate Genome-wide DE Figure

## Add Tags for Candidate Genes (These genes were derived from overlapping the variouw differential expression analyses)
load(file="~/Desktop/Sinv_Analyses/Candidate_gene_names.RData")
Candidate_Genes <- c(Naming_Frame_sub$Row.names,"gene1476")

temp.sig1 <- Sinv_annot_lg[((Sinv_annot_lg$X4 %in% Candidate_Genes)&(grepl("lg",Sinv_annot_lg$X1))),]
temp.sig1 <- temp.sig1[complete.cases(temp.sig1), ]
row.names(temp.sig1) <- temp.sig1$X4
## Warning: Setting row names on a tibble is deprecated.
temp.sig1 <- convert_LOC(temp.sig1)
## Warning: package 'rtracklayer' was built under R version 3.5.2
## Warning: package 'rentrez' was built under R version 3.5.2
## Appended gene descriptions that were not included due to an upstream error
temp.sig1[5,"description"] <- "dopamine receptor 1 "
temp.sig1[10,"description"] <- "pheromone-binding protein Gp-9-like"
temp.sig1[11,"description"] <- "NADH dehydrogenase [ubiquinone] 1 alpha subcomplex subunit 9, mitochondrial-like"
temp.sig1[14,"description"] <- "putative nuclease HARBI1"

DEG_GR1 <- makeGRangesFromDataFrame(temp.sig1, seqnames.field = "X1",start.field = "X2",end.field = "X3",keep.extra.columns = TRUE)
DEG_GR1$labels <- DEG_GR1$description

Chr_sizes_GR <- toGRanges("~/Desktop/Sinv_Analyses/lg_size.bed")
gene_bed_GR <- toGRanges("~/Desktop/Sinv_Analyses/NCBI_genome/Genetic_Mapping/NCBI_lgonly_gene_v4.bed")
supergene_GR <- GRanges(seqnames = "lg16",ranges=IRanges(7311525,18123987)) ## Need to check points!!!

pp <- getDefaultPlotParams(plot.type = 4)
pp$data1inmargin <- 5
pp$rightmargin <- 0.2
kp <- plotKaryotype(genome = Chr_sizes_GR, plot.type=4, ideogram.plotter = NULL, labels.plotter = NULL,plot.params = pp)
kpAddCytobandsAsLine(kp,lwd = 30)
kpAddChromosomeNames(kp, srt=90)
core_bb_windows <- generate_heatmap_frame(row.names(glm.QLF.BBvbb_TT_sig))
core_Bb_windows <- generate_heatmap_frame(row.names(glm.QLF.BBvBb_TT_sig))
Brain_Bb_windows <- generate_heatmap_frame(row.names(pw.qlf.Brain_BB_v_Bb.TT.sig))
Brain_bb_windows <- generate_heatmap_frame(row.names(pw.qlf.Brain_BB_v_bb.TT.sig))
Ovary_Bb_windows <- generate_heatmap_frame(row.names(pw.qlf.Ovary_BB_v_Bb.TT.sig))
Ovary_bb_windows <- generate_heatmap_frame(row.names(pw.qlf.Ovary_BB_v_bb.TT.sig))

## Add DEG density Heatmap
colfunc <- colorRampPalette(c("white", "blue"))
kpHeatmap(kp,data=core_bb_windows,r0=0.11,r1=0.15,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=core_Bb_windows,r0=0.16,r1=0.2,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=Brain_bb_windows,r0=0.21,r1=0.25,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=Brain_Bb_windows,r0=0.26,r1=0.3,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=Ovary_bb_windows,r0=0.31,r1=0.35,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=Ovary_Bb_windows,r0=0.36,r1=0.4,colors = colfunc(5),ymin=0,ymax=20)
## Add supergene Marker
kpPlotRegions(kp,data = supergene_GR,col=adjustcolor("darkgreen",alpha.f = 0.5), r0=0.05,r1=0.1)
kpPlotMarkers(kp, DEG_GR1,labels = DEG_GR1$description,r0 = 0.41,r1=0.5,ignore.chromosome.ends = TRUE,max.iter = 10000,label.dist = -0.001,clipping = TRUE,marker.parts = c(0.1,0.8,0.1))

## Determine if there is a statistical overrepresentation of DEGs within the supergene and if it is directional. 
## Check for overlap in supergene and DEGs for each pairwise comparison
check_supergene_presence_bias(glm.QLF.BBvBb_TT$table)
## [1]   17  354   17 8139
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
##      [,1] [,2]
## [1,]   17   17
## [2,]  354 8139
##            [,1]      [,2]
## [1,]   1.479301   32.5207
## [2,] 369.520699 8123.4793
## [1] 4.633262e-39
## 
##  Fisher's Exact Test for Count Data
## 
## data:  matrix(out[1:4], nrow = 2)
## p-value = 5.933e-15
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  10.91879 48.26582
## sample estimates:
## odds ratio 
##   22.96189
## NULL
check_supergene_presence_bias(glm.QLF.BBvbb_TT$table)
## [1]   43  328   38 8118
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
##      [,1] [,2]
## [1,]   43   38
## [2,]  328 8118
##            [,1]       [,2]
## [1,]   3.524217   77.47578
## [2,] 367.475783 8078.52422
## [1] 1.651845e-103
## 
##  Fisher's Exact Test for Count Data
## 
## data:  matrix(out[1:4], nrow = 2)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  17.39221 45.17243
## sample estimates:
## odds ratio 
##    27.9656
## NULL
check_supergene_presence_bias(pw.qlf.Brain_BB_v_Bb.TT$table)
## [1]   14  357   14 8142
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
##      [,1] [,2]
## [1,]   14   14
## [2,]  357 8142
##            [,1]       [,2]
## [1,]   1.218248   26.78175
## [2,] 369.781752 8129.21825
## [1] 1.902539e-32
## 
##  Fisher's Exact Test for Count Data
## 
## data:  matrix(out[1:4], nrow = 2)
## p-value = 1.576e-12
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   9.984269 51.955997
## sample estimates:
## odds ratio 
##   22.77735
## NULL
check_supergene_presence_bias(pw.qlf.Brain_BB_v_bb.TT$table)
## [1]   38  333   58 8098
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
##      [,1] [,2]
## [1,]   38   58
## [2,]  333 8098
##           [,1]       [,2]
## [1,]   4.17685   91.82315
## [2,] 366.82315 8064.17685
## [1] 6.043095e-65
## 
##  Fisher's Exact Test for Count Data
## 
## data:  matrix(out[1:4], nrow = 2)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  10.12979 24.77462
## sample estimates:
## odds ratio 
##   15.91616
## NULL
check_supergene_presence_bias(pw.qlf.Ovary_BB_v_Bb.TT$table)
## [1]    8  363   16 8140
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
##      [,1] [,2]
## [1,]    8   16
## [2,]  363 8140
##            [,1]       [,2]
## [1,]   1.044213   22.95579
## [2,] 369.955787 8133.04421
## [1] 3.172793e-12
## 
##  Fisher's Exact Test for Count Data
## 
## data:  matrix(out[1:4], nrow = 2)
## p-value = 4.744e-06
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   4.122905 27.963466
## sample estimates:
## odds ratio 
##   11.20021
## NULL
check_supergene_presence_bias(pw.qlf.Ovary_BB_v_bb.TT$table)
## [1]   40  331  117 8039
## [1] -2555
##      [,1] [,2]
## [1,]   40  117
## [2,]  331 8039
##           [,1]      [,2]
## [1,]   6.83089  150.1691
## [2,] 364.16911 8005.8309
## [1] 3.400547e-39
## 
##  Fisher's Exact Test for Count Data
## 
## data:  matrix(out[1:4], nrow = 2)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   5.549275 12.199496
## sample estimates:
## odds ratio 
##   8.298817
## NULL
## Generate adjusted logFC versions of the results tables so pw and glm tables are comparable. 
glm.QLF.BBvBb_adj <- glm.QLF.BBvBb_TT_sig
glm.QLF.BBvBb_adj$logFC <- -1*glm.QLF.BBvBb_TT_sig$logFC
glm.QLF.BBvbb_adj <- glm.QLF.BBvbb_TT_sig
glm.QLF.BBvbb_adj$logFC <- -1*glm.QLF.BBvbb_TT_sig$logFC

check_supergene_dir_bias(glm.QLF.BBvBb_adj)
## [1]  1 16  1 16
## [1] -16
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
##      [,1] [,2]
## [1,]    1    1
## [2,]   16   16
##      [,1] [,2]
## [1,]    1    1
## [2,]   16   16
## [1] 1
## 
##  Fisher's Exact Test for Count Data
## 
## data:  matrix(out[1:4], nrow = 2)
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   0.01200665 83.28721289
## sample estimates:
## odds ratio 
##          1
## NULL
check_supergene_dir_bias(glm.QLF.BBvbb_adj) 
## [1] 16 27 13 25
## [1] -52
##      [,1] [,2]
## [1,]   16   13
## [2,]   27   25
##          [,1]     [,2]
## [1,] 15.39506 13.60494
## [2,] 27.60494 24.39506
## [1] 0.7787573
## 
##  Fisher's Exact Test for Count Data
## 
## data:  matrix(out[1:4], nrow = 2)
## p-value = 0.8197
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.4165648 3.1443264
## sample estimates:
## odds ratio 
##   1.137764
## NULL
check_supergene_dir_bias(pw.qlf.Brain_BB_v_Bb.TT.sig)
## [1]  1 13  4 10
## [1] -14
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
##      [,1] [,2]
## [1,]    1    4
## [2,]   13   10
##      [,1] [,2]
## [1,]  2.5  2.5
## [2,] 11.5 11.5
## [1] 0.1387917
## 
##  Fisher's Exact Test for Count Data
## 
## data:  matrix(out[1:4], nrow = 2)
## p-value = 0.3259
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.003637868 2.484508917
## sample estimates:
## odds ratio 
##  0.2033741
## NULL
check_supergene_dir_bias(pw.qlf.Brain_BB_v_bb.TT.sig)
## [1] 17 21 38 20
## [1] -48
##      [,1] [,2]
## [1,]   17   38
## [2,]   21   20
##          [,1]     [,2]
## [1,] 21.77083 33.22917
## [2,] 16.22917 24.77083
## [1] 0.04412524
## 
##  Fisher's Exact Test for Count Data
## 
## data:  matrix(out[1:4], nrow = 2)
## p-value = 0.05811
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.1693863 1.0681989
## sample estimates:
## odds ratio 
##  0.4300187
## NULL
check_supergene_dir_bias(pw.qlf.Ovary_BB_v_Bb.TT.sig)
## [1]  1  7  0 16
## [1] -19
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
##      [,1] [,2]
## [1,]    1    0
## [2,]    7   16
##           [,1]       [,2]
## [1,] 0.3333333  0.6666667
## [2,] 7.6666667 15.3333333
## [1] 0.1485618
## 
##  Fisher's Exact Test for Count Data
## 
## data:  matrix(out[1:4], nrow = 2)
## p-value = 0.3333
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.05128188        Inf
## sample estimates:
## odds ratio 
##        Inf
## NULL
check_supergene_dir_bias(pw.qlf.Ovary_BB_v_bb.TT.sig) 
## [1]  19  21  17 100
## [1] -83
##      [,1] [,2]
## [1,]   19   17
## [2,]   21  100
##           [,1]     [,2]
## [1,]  9.171975 26.82803
## [2,] 30.828025 90.17197
## [1] 1.852027e-05
## 
##  Fisher's Exact Test for Count Data
## 
## data:  matrix(out[1:4], nrow = 2)
## p-value = 5.337e-05
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   2.192937 12.846506
## sample estimates:
## odds ratio 
##   5.250997
## NULL
## Take the results from above and visualize it as a stacked bar plot (Figure S2)
DE_dir_CT <- read.delim("~/Desktop/Sinv_Analyses/DE_dir_CT2.tsv")
DE_dir_CT$Comparison_f = factor(DE_dir_CT$Comparison, levels=c('Full BB vs. Bb','Brain BB vs. Bb','Ovary BB vs. Bb','Full BB vs. bb','Brain BB vs. bb','Ovary BB vs. bb'))
ggplot(data=DE_dir_CT, aes(x=Genome.Position, y=DEG_Ratio, fill=DE.Direction)) + 
  geom_bar(stat="identity") + 
  facet_grid(~Comparison_f) +   
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

Allele-specific Expression Analysis

## Load SNP-level files in the individual GATK ASE Read Counter files (for all 16 Bb libraries)
setwd("~/Desktop/Sinv_Analyses/GATK_ASERC_mod/")
temp = list.files(pattern="*counts.csv")
temp2 <- sapply(strsplit(temp, split='_', fixed=TRUE), function(x) (x[1]))
myfiles = lapply(temp, read.delim)
# Note: Each File is saved as myfiles[[x]] as a data.frame

## merge first two columns into identifier column
for (i in 1:16){
  myfiles[[i]]$ID <- paste(myfiles[[i]]$contig,myfiles[[i]]$position,myfiles[[i]]$altAllele,sep = "~")
  colnames(myfiles[[i]]) <- paste(colnames(myfiles[[i]]), temp2[i], sep = ".")
  colnames(myfiles[[i]])[14] <- "ID"
}

## merge all data.frames by ID
merged <- Reduce(function(...) merge(..., by='ID', all.x=FALSE), myfiles)

## Load gene-level files (made using bedtools intersect) in the individual GATK ASE Read Counter files (for all 16 Bb libraries)
temp_gene = list.files(pattern="*genes.tsv")
temp2_gene <- sapply(strsplit(temp_gene, split='_', fixed=TRUE), function(x) (x[1]))
myfiles_gene = lapply(temp_gene, read.delim)
# Note: Each File is saved as myfiles[[x]] as a data.frame

## merge first two columns into identifier column
for (i in 1:16){
  colnames(myfiles_gene[[i]]) <- paste(colnames(myfiles_gene[[i]]), temp2_gene[i], sep = ".")
  colnames(myfiles_gene[[i]])[1] <- "gene"
}

## merge all data.frames by ID
merged_genes <- Reduce(function(...) merge(..., by='gene', all.x=FALSE), myfiles_gene)
pull_cols_genes <- grepl("refCount|altCount",colnames(merged_genes))
row.names(merged_genes) <- merged_genes$gene
x_genes <- merged_genes[,pull_cols_genes]

## Extract the specific columns that I want (refCount and altCount)
pull_cols <- grepl("refCount|altCount",colnames(merged))
row.names(merged) <- merged$ID
x <- merged[,pull_cols]

## Load in model matrix (Modified to analyze ASE)
adv_group_ASE <- read.delim("/Users/samarsenault/Desktop/Sinv_Analyses/Sinv_model_matrix6.tsv",sep = "\t")
head(adv_group_ASE)
##   Sample Individual_ID Colony_ID Tissue Social.Form Genotype
## 1  104AB          104A       104  Brain    Polygyne       Bb
## 2  104AB          104A       104  Brain    Polygyne       Bb
## 3  104AO          104A       104  Ovary    Polygyne       Bb
## 4  104AO          104A       104  Ovary    Polygyne       Bb
## 5  107AB          107A       107  Brain    Polygyne       Bb
## 6  107AB          107A       107  Brain    Polygyne       Bb
##              Alt_ID      Allele
## 1 Brain.Polygyne.Bb   Reference
## 2 Brain.Polygyne.Bb Alternative
## 3 Ovary.Polygyne.Bb   Reference
## 4 Ovary.Polygyne.Bb Alternative
## 5 Brain.Polygyne.Bb   Reference
## 6 Brain.Polygyne.Bb Alternative
Colony_ID <- factor(adv_group_ASE$Colony_ID)
Tissue <- factor(adv_group_ASE$Tissue)
Allele <- factor(adv_group_ASE$Allele)

## Describe the Experiment Design and define Contrasts
ASE_design <- model.matrix(~0+Colony_ID+Tissue*Allele,data=adv_group_ASE)

y_ASE <- DGEList(x,samples=ASE_design)

y_ASE <- estimateDisp(y_ASE,ASE_design)
y_ASE <- estimateGLMCommonDisp(y_ASE, ASE_design)
y_ASE <- estimateGLMTrendedDisp(y_ASE, ASE_design)
y_ASE <- estimateGLMTagwiseDisp(y_ASE, ASE_design)

## QLM Fit and DE analysis
fit_ASE <- glmQLFit(y_ASE,ASE_design)

## DE Analysis
QLF.ASE <- glmQLFTest(fit_ASE,coef="AlleleReference") 
QLF.ASE_TT <- topTags(QLF.ASE,n=Inf)
QLF.ASE_TT_sig <- topTags(QLF.ASE_TT,p.value = 0.01,n=Inf)$table
QLF.ASE_TT_sig$SNPs <- row.names(QLF.ASE_TT_sig)

make_volcano(QLF.ASE,"SB vs. Sb in SB/Sb individuals")
## [1] 78
## [1] 13
## [1] 6
## [1] 61
## Warning: Removed 1 rows containing missing values (geom_point).

## Note: Positive values in volcano plot indicate higher expression in reference. 


## Describe the Experiment Design and define Contrasts
y_gene<- DGEList(x_genes,samples=ASE_design)

y_gene <- estimateDisp(y_gene,ASE_design)
y_gene <- estimateGLMCommonDisp(y_gene, ASE_design)
y_gene <- estimateGLMTrendedDisp(y_gene, ASE_design)
y_gene <- estimateGLMTagwiseDisp(y_gene, ASE_design)

## QLM Fit and DE analysis
fit_gene <- glmQLFit(y_gene,ASE_design)

## DE Analysis
QLF.ASE.gene <- glmQLFTest(fit_gene,coef="AlleleReference") 
QLF.ASE.gene_TT <- topTags(QLF.ASE.gene,n=Inf)
QLF.ASE.gene_TT_sig <- topTags(QLF.ASE.gene_TT,p.value = 0.01,n=Inf)$table
QLF.ASE.gene_TT_all <- topTags(QLF.ASE.gene_TT,p.value = 1,n=Inf)$table

## Generate ASE Gene Volcano plot (Figure 4A)
make_volcano(QLF.ASE.gene,"Alternatively Spliced Genes") 
## [1] 10
## [1] 5
## [1] 4
## [1] 9

## Note: Positive values in volcano plot indicate higher expression in reference. 

## Break the topTags file into a better data.frame
QLF.ASE_TT_df <- QLF.ASE_TT$table
QLF.ASE_TT_df$chr <- sapply(strsplit(row.names(QLF.ASE_TT_df), split='~', fixed=TRUE), function(x) (x[1]))
QLF.ASE_TT_df$start <- sapply(strsplit(row.names(QLF.ASE_TT_df), split='~', fixed=TRUE), function(x) (x[2]))
QLF.ASE_TT_df$end <- sapply(strsplit(row.names(QLF.ASE_TT_df), split='~', fixed=TRUE), function(x) (x[2]))
## merge with linkage map. 
genetic_map <- read.delim("~/Desktop/Sinv_Analyses/NCBI_genome/Genetic_Mapping/genetic_map.txt", comment.char="#")
head (genetic_map)
##         scaffold start     end orientation  lg lg_position partial
## 1 NW_011798938.1     1  955721   uncertain lg1           1   FALSE
## 2 NW_011794913.1     1 1181834           + lg1      955822   FALSE
## 3 NW_011795398.1     1 1293734           - lg1     2137756   FALSE
## 4 NW_011801858.1     1 1483336           - lg1     3431590   FALSE
## 5 NW_011798146.1     1  451599           - lg1     4915026   FALSE
## 6 NW_011800891.1     1 3265217           + lg1     5366725   FALSE
## Map the SNPs (based on gnG genome) to the linkage map. 
QLF.ASE_TT_df_merge <- merge(QLF.ASE_TT_df,genetic_map,by.x="chr",by.y="scaffold")
for (i in 1:nrow(QLF.ASE_TT_df_merge)){
  if (QLF.ASE_TT_df_merge$orientation[i]=="-"){
    QLF.ASE_TT_df_merge$newstart[i] <- QLF.ASE_TT_df_merge$lg_position[i] + (QLF.ASE_TT_df_merge$end.y[i]-as.numeric(QLF.ASE_TT_df_merge$start.x[i]))
  } else {
    QLF.ASE_TT_df_merge$newstart[i] <- QLF.ASE_TT_df_merge$lg_position[i] + as.numeric(QLF.ASE_TT_df_merge$start.x[i]) - as.numeric(QLF.ASE_TT_df_merge$start.y[i])
  }
}
QLF.ASE_TT_df_merge$newend <- QLF.ASE_TT_df_merge$newstart
QLF.ASE_TT_df_merge <- QLF.ASE_TT_df_merge[(QLF.ASE_TT_df_merge$FDR<=0.01),]

## Add in LOC IDs for the ASE gene frames
QLF.ASE.gene_TT_sig_LOC <- convert_LOC(QLF.ASE.gene_TT_sig)
QLF.ASE.gene_TT_all_LOC <- convert_LOC(QLF.ASE.gene_TT_all)


## Make ASE DEG Upset Plot (Figure 4B)
##Filter Gene name lists down by which genes were analyzed in ASE and DE
ASE_background <- QLF.ASE.gene_TT_all_LOC$Row.names
Upset_backgroud <- intersect(Background.genes,ASE_background)
ASE_genes_b <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$FDR <= 0.01)&(QLF.ASE.gene_TT_all_LOC$logFC<0),c("Row.names")]
ASE_genes_B <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$FDR <= 0.01)&(QLF.ASE.gene_TT_all_LOC$logFC>0),c("Row.names")]
DE_genes_b <- row.names(glm.QLF.BBvBb_TT_sig[(glm.QLF.BBvBb_TT_sig$logFC>0),])
DE_genes_B <- row.names(glm.QLF.BBvBb_TT_sig[(glm.QLF.BBvBb_TT_sig$logFC<0),])

ASE_genes_b <- intersect(ASE_genes_b,Upset_backgroud)
ASE_genes_B <- intersect(ASE_genes_B,Upset_backgroud)
DE_genes_b <- intersect(DE_genes_b,Upset_backgroud)
DE_genes_B <- intersect(DE_genes_B,Upset_backgroud)

Overlap <- list(ASE_b=ASE_genes_b,
                ASE_B=ASE_genes_B,
                DE_SBSb=DE_genes_b,
                DE_SBSB=DE_genes_B
)
upset(fromList(Overlap),nsets=4,order.by = "freq",text.scale = 1.4)

## Extract overlap information

b_overlap <- ASE_genes_b[(ASE_genes_b %in% DE_genes_b)]
b_overlap_data <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$Row.names %in% b_overlap),]

B_overlap <- ASE_genes_B[ASE_genes_B %in% DE_genes_B]
QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$Row.names %in% B_overlap),]
##    Row.names    logFC   logCPM        F       PValue          FDR
## 61 gene11836 1.150108 11.61617 46.38109 1.162715e-07 3.226534e-06
##            Name
## 61 LOC105203076
##                                                     description
## 61 nucleolar protein 16 [Source:RefSeq mRNA;Acc:XM_011171831.1]
nonB_overlap <- ASE_genes_B[!(ASE_genes_B %in% DE_genes_B)]
dosage_comp_B <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$Row.names %in% nonB_overlap),]

nonb_overlap <- ASE_genes_b[!(ASE_genes_b %in% DE_genes_b)]
dosage_comp_b <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$Row.names %in% nonb_overlap),]

Visualize Supergene Localization

## Set some plotting parameters for the karyoploteR
plot.params <- getDefaultPlotParams(plot.type = 1)
plot.params$data1height=300
plot.params$data1outmargin = 20
plot.params$data1inmargin = 5
plot.params$ideogramheight = 10
col.over <- "#FFBD07AA" #Yellow
col.under <- "#00A6EDAA" #Blue
col.insig <- "#7F7F7F"

## Begin Creating the tracks for Figure 5
kp <- plotKaryotype(genome = Chr_sizes_GR, cytobands = gene_bed_GR,chromosomes = c("lg16"),plot.params = plot.params)
## Warning in ideogram.plotter(kp, ...): No 'gieStain' column found in
## cytobands. Using 'gpos50' (gray) for all of them
kpAddBaseNumbers(kp,tick.dist = 1000000,add.units = TRUE)

## Add Supergene
kpPlotRegions(kp,data = supergene_GR,col="darkgreen", r0=0,r1=0.01)

## Add in various inverions from Huang et al.  (from 0 to 0.1)
gr <- GRanges(seqnames = "lg16", strand = c("+", "+", "+"),
              ranges = IRanges(start = c(7948943,8793193,18097796), end = c(8793193,18097796,18098299)))
values(gr) <- DataFrame(labels = c("Inversion A","Inversion B","Inversion C"))
kpPlotRegions(kp,data = gr[1],col="purple", r0=0.02,r1=0.03)
kpPlotRegions(kp,data = gr[2],col="orange", r0=0.02,r1=0.03)
kpPlotRegions(kp,data = gr[3],col="red", r0=0.02,r1=0.03)

## Plot Gene density across Lg16
kpPlotDensity(kp, gene_bed_GR, window.size = 500000, col="#ddaacc",r0 = 0.04,r1 = 0.09)

## Core BBvbb
QLF.BBvbb_adj.merge2 <- merge(glm.QLF.BBvbb_TT_sig,Sinv_annot_lg,by.x=0,by.y="X4")
QLF.BBvbb_adj.merge2$logFC <- -1*QLF.BBvbb_adj.merge2$logFC
QLF.BBvbb_adj.merge <- QLF.BBvbb_adj.merge2[QLF.BBvbb_adj.merge2$X1=="lg16",]
DEG_GR <- makeGRangesFromDataFrame(QLF.BBvbb_adj.merge, seqnames.field = "X1",start.field = "X2",end.field = "X3",keep.extra.columns = TRUE)

fc.ymax=ceiling(max(abs(DEG_GR$logFC)))
fc.ymin=-fc.ymax
cex.val <- -log10(DEG_GR$FDR)/15 + 0.7
sign.col <- rep(col.insig, length(DEG_GR))
sign.col[(DEG_GR$logFC<0)&(DEG_GR$FDR<=0.01)] <- col.under
sign.col[(DEG_GR$logFC>0)&(DEG_GR$FDR<=0.01)] <- col.over
kpPoints(kp, data=DEG_GR, y=DEG_GR$logFC, ymax=fc.ymax, ymin=fc.ymin,cex=cex.val, col=sign.col,r0 = 0.14, r1 = 0.29)
kpAxis(kp, ymax=fc.ymax, ymin=fc.ymin,side = 2,cex=0.5,r0 = 0.14, r1 = 0.29)

DEG_density_B <- generate_heatmap_frame(QLF.BBvbb_adj.merge[QLF.BBvbb_adj.merge$logFC>0,]$Row.names)
DEG_density_b <- generate_heatmap_frame(QLF.BBvbb_adj.merge[QLF.BBvbb_adj.merge$logFC<0,]$Row.names)

colfunc <- colorRampPalette(c("lightcyan1", "red"))
kpHeatmap(kp,data=DEG_density_B,r0=0.3,r1=0.33,colors = colfunc(5))
kpHeatmap(kp,data=DEG_density_b,r0=0.1+0,r1=0.13,colors = colfunc(5))

## Core BBvBb
QLF.BBvBb_adj.merge2 <- merge(glm.QLF.BBvBb_TT_sig,Sinv_annot_lg,by.x=0,by.y="X4")
QLF.BBvBb_adj.merge <- QLF.BBvBb_adj.merge2[QLF.BBvBb_adj.merge2$X1=="lg16",]
DEG_GR <- makeGRangesFromDataFrame(QLF.BBvBb_adj.merge, seqnames.field = "X1",start.field = "X2",end.field = "X3",keep.extra.columns = TRUE)

fc.ymax=ceiling(max(abs(DEG_GR$logFC)))
fc.ymin=-fc.ymax
cex.val <- -log10(DEG_GR$FDR)/15 + 0.7
sign.col <- rep(col.insig, length(DEG_GR))
sign.col[(DEG_GR$logFC<0)&(DEG_GR$FDR<=0.01)] <- col.under
sign.col[(DEG_GR$logFC>0)&(DEG_GR$FDR<=0.01)] <- col.over
kpPoints(kp, data=DEG_GR, y=DEG_GR$logFC, ymax=fc.ymax, ymin=fc.ymin,cex=cex.val, col=sign.col,r0 = 0.39, r1 = 0.54)
kpAxis(kp, ymax=fc.ymax, ymin=fc.ymin,side = 2,cex=0.5,r0 = 0.38, r1 = 0.54)

DEG_density_B <- generate_heatmap_frame(QLF.BBvBb_adj.merge[QLF.BBvBb_adj.merge$logFC>0,]$Row.names)
DEG_density_b <- generate_heatmap_frame(QLF.BBvBb_adj.merge[QLF.BBvBb_adj.merge$logFC<0,]$Row.names)

colfunc <- colorRampPalette(c("lightcyan1", "red"))
kpHeatmap(kp,data=DEG_density_b,r0=0.35,r1=0.38,colors = colfunc(5))
kpHeatmap(kp,data=DEG_density_B,r0=0.55,r1=0.58,colors = colfunc(5))

## Add in ASE Heatmap and B vs. b dots 0.6-0.8

ASE_GR <- makeGRangesFromDataFrame(QLF.ASE_TT_df_merge, seqnames.field = "lg",start.field = "newstart",end.field = "newend",keep.extra.columns = TRUE)

fc.ymax=ceiling(max(abs(ASE_GR$logFC)))
fc.ymin=-fc.ymax
cex.val <- -log10(ASE_GR$FDR)/15 + 0.7
sign.col <- rep(col.insig, length(ASE_GR))
col.over2 <- rgb(125, 125, 0, max = 255, alpha = 100) #Yellow
col.under2 <- rgb(0, 0, 255, max = 255, alpha = 100) #Blue
sign.col[(ASE_GR$logFC<0)&(ASE_GR$FDR<=0.01)] <- col.under2
sign.col[(ASE_GR$logFC>0)&(ASE_GR$FDR<=0.01)] <- col.over2
kpPoints(kp, data=ASE_GR, y=ASE_GR$logFC, ymax=fc.ymax, ymin=fc.ymin,cex=cex.val, col=sign.col,r0 = 0.64, r1 = 0.79)
kpAxis(kp, ymax=fc.ymax, ymin=fc.ymin,side = 2,cex=0.5,r0 = 0.63, r1 = 0.79)

SNP_density_B <- generate_heatmap_frame_SNP(QLF.ASE_TT_df_merge[(QLF.ASE_TT_df_merge$logFC>0),])
SNP_density_b <- generate_heatmap_frame_SNP(QLF.ASE_TT_df_merge[(QLF.ASE_TT_df_merge$logFC<0),])

colfunc <- colorRampPalette(c("lightcyan1", "red"))
kpHeatmap(kp,data=SNP_density_B,r0=0.80,r1=0.83,colors = colfunc(5))
kpHeatmap(kp,data=SNP_density_b,r0=0.60,r1=0.63,colors = colfunc(5))

## Perform Run's Test on DEGs, Testing for non-uniformity in significance

## Did some merging of data frames using shell scripting and generated an excel spreadsheet of the stats for genes in the supergene.
Supergene_genes <- read_excel("~/Desktop/Sinv_Analyses/Supergene_genes.xlsx",col_types = "guess")
for (i in c(5,6,13:25)){
  Supergene_genes[,i] <- as.numeric(unlist(Supergene_genes[,i]))
}
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion
runs.test(Supergene_genes$Core_Bb_FDR[!is.na(Supergene_genes$Core_Bb_FDR)],plot=T,threshold = 0.01)

## 
##  Runs Test
## 
## data:  Supergene_genes$Core_Bb_FDR[!is.na(Supergene_genes$Core_Bb_FDR)]
## statistic = -1.4578, runs = 31, n1 = 351, n2 = 17, n = 368,
## p-value = 0.1449
## alternative hypothesis: nonrandomness
runs.test(Supergene_genes$Core_bb_FDR[!is.na(Supergene_genes$Core_bb_FDR)],plot=T,threshold = 0.01)

## 
##  Runs Test
## 
## data:  Supergene_genes$Core_bb_FDR[!is.na(Supergene_genes$Core_bb_FDR)]
## statistic = -0.4954, runs = 75, n1 = 325, n2 = 43, n = 368,
## p-value = 0.6203
## alternative hypothesis: nonrandomness
## Testing for non-uniformity in direcitonality among DEGs
Bb_DEGs <- Supergene_genes[Supergene_genes$Core_Bb_FDR<=0.01,c("start","Core_Bb_FDR","Core_Bb_logFC")]
Bb_DEGs$binary <- (Bb_DEGs$Core_Bb_logFC > 0)*1
bb_DEGs <- Supergene_genes[Supergene_genes$Core_bb_FDR<=0.01,c("start","Core_bb_FDR","Core_bb_logFC")]
bb_DEGs$binary <- (bb_DEGs$Core_bb_logFC > 0)*1

## Testing for nonuniformity of ASE GENES
ASE_temp <- Supergene_genes[,c("start","ASE_FDR.","ASE_logFC")]
ASE_temp <- ASE_temp[!is.na(ASE_temp[,2]),]
ASE_genes <- ASE_temp[(ASE_temp$ASE_FDR.<=0.01),]
ASE_genes$binary <- (ASE_genes$ASE_logFC > 0)*1

runs.test(ASE_genes$ASE_logFC[9:21],plot=T,threshold = 0) ## This subset is significant

## 
##  Runs Test
## 
## data:  ASE_genes$ASE_logFC[9:21]
## statistic = -2.0185, runs = 4, n1 = 6, n2 = 7, n = 13, p-value =
## 0.04354
## alternative hypothesis: nonrandomness
runs.test(ASE_genes$ASE_logFC,plot=T,threshold = 0)

## 
##  Runs Test
## 
## data:  ASE_genes$ASE_logFC
## statistic = 0.027658, runs = 15, n1 = 13, n2 = 15, n = 28, p-value
## = 0.9779
## alternative hypothesis: nonrandomness
runs.test(Bb_DEGs$Core_Bb_logFC,plot=T,threshold = 0)

## 
##  Runs Test
## 
## data:  Bb_DEGs$Core_Bb_logFC
## statistic = 0.36515, runs = 3, n1 = 16, n2 = 1, n = 17, p-value =
## 0.715
## alternative hypothesis: nonrandomness
runs.test(bb_DEGs$Core_bb_logFC,plot=T,threshold = 0)

## 
##  Runs Test
## 
## data:  bb_DEGs$Core_bb_logFC
## statistic = -2.3469, runs = 14, n1 = 27, n2 = 16, n = 43, p-value
## = 0.01893
## alternative hypothesis: nonrandomness
## Runs ASE by SNP

ASE_logFC <- QLF.ASE_TT_df_merge[order(QLF.ASE_TT_df_merge$newend),c("logFC")]
runs.test(ASE_logFC,plot=T,threshold = 0)

## 
##  Runs Test
## 
## data:  ASE_logFC
## statistic = -5.7485, runs = 43, n1 = 67, n2 = 91, n = 158, p-value
## = 9.004e-09
## alternative hypothesis: nonrandomness

Generate Polar Plot

## Merge with linkage mapped annotation to determine supergene vs. nonsupergene genes
glm.QLF.all_TT_sig_merge <- merge(glm.QLF.all_TT_sig,Sinv_annot_lg,by.x=0,by.y="X4",all.x=TRUE)

supergene_genes <- glm.QLF.all_TT_sig_merge[((glm.QLF.all_TT_sig_merge$X1=="lg16")&(glm.QLF.all_TT_sig_merge$X2>=7311525)&(glm.QLF.all_TT_sig_merge$X3<=18123987)),c("Row.names")]
supergene_genes <- supergene_genes[!is.na(supergene_genes)]
nonsuper_genes <- glm.QLF.all_TT_sig_merge[((grepl(pattern = "lg",x = glm.QLF.all_TT_sig_merge$X1))&(glm.QLF.all_TT_sig_merge$Row.names %!in% supergene_genes)),c("Row.names")]

## Generate Polar Plot with only the genes in the supergene (Figure S3,B,C)
NCBI_supergene_polar <- make_polar_DEG(glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% supergene_genes),],15)
NCBI_supergene_polar
## Warning: Removed 1 rows containing missing values (geom_point).

NCBI_nonsupergene_polar <- make_polar_DEG(glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% nonsuper_genes),],10)
NCBI_nonsupergene_polar

## Extract the angles of expression using an arctangent and then compare the radial distributions using a watson's two test
nonsuper_angle <- atan2(glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% nonsuper_genes),c("logFC.GenotypeBb")],glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% nonsuper_genes),c("logFC.Genotypebb")])
supergene_angle <- atan2(glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% supergene_genes),c("logFC.GenotypeBb")],glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% supergene_genes),c("logFC.Genotypebb")])
watson.two(nonsuper_angle, supergene_angle, alpha=0, plot=TRUE)

## 
##        Watson's Two-Sample Test of Homogeneity 
##  
## Test Statistic: 0.2387 
## 0.01 < P-value < 0.05 
## 

Odorant Binding Protein Analysis

## Import the OBP-aligned RNA-seq Data
x1_OBP <- read_delim("~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all", 
                  "\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   GeneID = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 52 parsing failures.
## row col   expected     actual                                                     file
##   1  -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
##   2  -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
##   3  -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
##   4  -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
##   5  -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
## ... ... .......... .......... ........................................................
## See problems(...) for more details.
x_OBP <- data.frame(x1_OBP[1:52,2:64],row.names = x1_OBP$GeneID[1:52])

## Generate data.frame for pairwise OBP analyses
y_OBP <- DGEList(x_OBP,group = adv_group$Alt_ID)

keep_OBP <- rowSums(cpm(y_OBP)>10/9) >= 8 ## NEED TO EDIT!!!
y_OBP <- y_OBP[keep_OBP, , keep.lib.sizes=FALSE]

## Normalize samples by library size
y_OBP <- calcNormFactors(y_OBP)
obp.Background.genes <- row.names(y_OBP$counts)


## Begin Computing Differential Expression
y_OBP <- estimateDisp(y_OBP,design_pw)
y_OBP <- estimateGLMCommonDisp(y_OBP, design_pw)
y_OBP <- estimateGLMTrendedDisp(y_OBP, design_pw)
y_OBP <- estimateGLMTagwiseDisp(y_OBP, design_pw)

## QLM Fit and DE analysis
## Note: For more conservative estimates, use glmQLFTest vs. glmLRT
fit_OBP <- glmQLFit(y_OBP,design_pw)



obp.qlf.Brain_BB_v_Bb <- glmQLFTest(fit_OBP, contrast=my.contrasts[,"Brain_BB_v_Bb"])
obp.qlf.Brain_BB_v_Bb.TT <- topTags(obp.qlf.Brain_BB_v_Bb,p.value = 1,n=100000)
obp.qlf.Brain_BB_v_Bb.TT.sig <- topTags(obp.qlf.Brain_BB_v_Bb,p.value = 0.01,n=100000)$table
obp.qlf.Brain_BB_v_Bb.TT.names <- row.names(obp.qlf.Brain_BB_v_Bb.TT$table)

obp.qlf.Ovary_BB_v_Bb <- glmQLFTest(fit_OBP, contrast=my.contrasts[,"Ovary_BB_v_Bb"])
obp.qlf.Ovary_BB_v_Bb.TT <- topTags(obp.qlf.Ovary_BB_v_Bb,p.value = 1,n=100000)
obp.qlf.Ovary_BB_v_Bb.TT.sig <- topTags(obp.qlf.Ovary_BB_v_Bb,p.value = 0.01,n=100000)$table
obp.qlf.Ovary_BB_v_Bb.TT.names <- row.names(obp.qlf.Ovary_BB_v_Bb.TT$table)

obp.qlf.Brain_BB_v_bb <- glmQLFTest(fit_OBP, contrast=my.contrasts[,"Brain_BB_v_bb"])
obp.qlf.Brain_BB_v_bb.TT <- topTags(obp.qlf.Brain_BB_v_bb,p.value = 1,n=100000)
obp.qlf.Brain_BB_v_bb.TT.sig <- topTags(obp.qlf.Brain_BB_v_bb,p.value = 0.01,n=100000)$table
obp.qlf.Brain_BB_v_bb.TT.names <- row.names(obp.qlf.Brain_BB_v_bb.TT$table)

obp.qlf.Ovary_BB_v_bb <- glmQLFTest(fit_OBP, contrast=my.contrasts[,"Ovary_BB_v_bb"])
obp.qlf.Ovary_BB_v_bb.TT <- topTags(obp.qlf.Ovary_BB_v_bb,p.value = 1,n=100000)
obp.qlf.Ovary_BB_v_bb.TT.sig <- topTags(obp.qlf.Ovary_BB_v_bb,p.value = 0.01,n=100000)$table
obp.qlf.Ovary_BB_v_bb.TT.names <- row.names(obp.qlf.Ovary_BB_v_bb.TT$table)


## Now the GLM version of the analysis
y_obp_glm <- DGEList(x_OBP,samples=glm_design)

## Filter out low count genes
y_obp_glm <- y_obp_glm[keep_OBP, , keep.lib.sizes=FALSE]

### Normalize samples by library size
y_obp_glm <- calcNormFactors(y_obp_glm)

## Begin Computing Differential Expression
y_obp_glm <- estimateDisp(y_obp_glm,glm_design)
y_obp_glm <- estimateGLMCommonDisp(y_obp_glm, glm_design)
y_obp_glm <- estimateGLMTrendedDisp(y_obp_glm, glm_design)
y_obp_glm <- estimateGLMTagwiseDisp(y_obp_glm, glm_design)

## QLM Fit and DE analysis
## Note: For more conservative estimates, use glmQLFTest vs. glmLRT
fit_obp_glm <- glmQLFit(y_obp_glm,glm_design)

## Compute Differential expression based on number of supergene alleles
## DE Analysis for Paper
obp.QLF.BBvBb <- glmQLFTest(fit_obp_glm,coef="GenotypeBb") 
obp.QLF.BBvBb_TT <- topTags(obp.QLF.BBvBb,n=Inf)
obp.QLF.BBvBb_TT_sig <- topTags(obp.QLF.BBvBb_TT,p.value = 0.01,n=Inf)$table

obp.QLF.BBvbb <- glmQLFTest(fit_obp_glm,coef="Genotypebb") 
obp.QLF.BBvbb_TT <- topTags(obp.QLF.BBvbb,n=Inf)
obp.QLF.BBvbb_TT_sig <- topTags(obp.QLF.BBvbb_TT,p.value = 0.01,n=Inf)$table

## Compute differential expression based on social form
obp.QLF.SF <- glmQLFTest(fit_obp_glm,contrast=makeContrasts(Monogyne - Polygyne, levels=glm_design))
obp.QLF.SF_TT <- topTags(obp.QLF.SF,n=Inf)
obp.QLF.SF_TT_sig <- topTags(obp.QLF.SF_TT,p.value = 0.01,n=Inf)$table


## Reorder the dataframes
obp.qlf.Brain_BB_v_Bb_order <- obp.qlf.Brain_BB_v_Bb.TT$table[ order(row.names(obp.qlf.Brain_BB_v_Bb.TT$table)), ]
obp.qlf.Ovary_BB_v_Bb_order <- obp.qlf.Ovary_BB_v_Bb.TT$table[ order(row.names(obp.qlf.Ovary_BB_v_Bb.TT$table)), ]
obp.qlf.Brain_BB_v_bb_order <- obp.qlf.Brain_BB_v_bb.TT$table[ order(row.names(obp.qlf.Brain_BB_v_bb.TT$table)), ]
obp.qlf.Ovary_BB_v_bb_order <- obp.qlf.Ovary_BB_v_bb.TT$table[ order(row.names(obp.qlf.Ovary_BB_v_bb.TT$table)), ]
obp.qlf.Core_BB_v_Bb_order <- obp.QLF.BBvBb_TT$table[ order(row.names(obp.QLF.BBvBb_TT$table)), ]
obp.qlf.Core_BB_v_bb_order <- obp.QLF.BBvbb_TT$table[ order(row.names(obp.QLF.BBvbb_TT$table)), ]
obp.qlf.SF_order <- obp.QLF.SF_TT$table[ order(row.names(obp.QLF.SF_TT$table)), ]


#### Generate FDR heatmap (Figure S4B)
DEG_heatmap <- data.frame(rep(x = 0, 27))
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Brain_BB_v_Bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Ovary_BB_v_Bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Brain_BB_v_bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Ovary_BB_v_bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Core_BB_v_Bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Core_BB_v_bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.SF_order$FDR)
row.names(DEG_heatmap) <- row.names(obp.qlf.Core_BB_v_bb_order)
DEG_heatmap <- DEG_heatmap[,2:8]
colnames(DEG_heatmap) <- c("Brain_BB_v_Bb","Ovary_BB_v_Bb","Brain_BB_v_bb","Ovary_BB_v_bb","Core_BB_v_Bb","Core_BB_v_bb","Core_SocialForm")

# create color palette
col.pal <- brewer.pal(9,"YlOrRd")
hm.parameters <- list(DEG_heatmap, 
                      color = col.pal,
                      scale = "none",
                      kmeans_k = NA,
                      show_rownames = T, show_colnames = T,breaks=c(0,0.001,0.01,0.05,0.1,0.2,0.5,1),display_numbers=TRUE,number_format="%.3f",
                      main = "Solenopsis invicta OBP DEG Significance",
                      clustering_method = "complete",
                      cluster_rows = FALSE, cluster_cols = FALSE,
                      clustering_distance_rows = "correlation", 
                      clustering_distance_cols = "correlation")
do.call("pheatmap", hm.parameters)

#### Generate LogFC heatmap (Figure S4A)
DEG_heatmap <- data.frame(rep(x = 0, 27))
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Brain_BB_v_Bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Ovary_BB_v_Bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Brain_BB_v_bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Ovary_BB_v_bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,-1*obp.qlf.Core_BB_v_Bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,-1*obp.qlf.Core_BB_v_bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.SF_order$logFC)
row.names(DEG_heatmap) <- row.names(obp.qlf.Core_BB_v_bb_order)
DEG_heatmap <- DEG_heatmap[,2:8]
colnames(DEG_heatmap) <- c("Brain_BB_v_Bb","Ovary_BB_v_Bb","Brain_BB_v_bb","Ovary_BB_v_bb","Core_BB_v_Bb","Core_BB_v_bb","Core_SocialForm")

# create color palette
col.pal <- brewer.pal(9,"RdBu")
hm.parameters <- list(DEG_heatmap, 
                      color = col.pal,
                      scale = "none",
                      kmeans_k = NA,
                      show_rownames = T, show_colnames = T,display_numbers=TRUE,number_format="%.3f",
                      main = "Solenopsis invicta OBP DEG logFC",
                      clustering_method = "complete",
                      cluster_rows = FALSE, cluster_cols = FALSE,
                      clustering_distance_rows = "correlation", 
                      clustering_distance_cols = "correlation")
do.call("pheatmap", hm.parameters)

Present Session Info

save.image(file = "Molecular_Ecology_Markdown.RData")
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS  10.14.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] rentrez_1.2.2        rtracklayer_1.42.2   readxl_1.3.1        
##  [4] randtests_1.0        RColorBrewer_1.1-2   CircStats_0.2-6     
##  [7] boot_1.3-23          MASS_7.3-51.4        pheatmap_1.0.12     
## [10] eulerr_6.0.0         data.table_1.12.2    topGO_2.34.0        
## [13] SparseM_1.77         GO.db_3.7.0          AnnotationDbi_1.44.0
## [16] Biobase_2.42.0       graph_1.60.0         ggplot2_3.2.1       
## [19] cowplot_1.0.0        karyoploteR_1.8.8    regioneR_1.14.0     
## [22] GenomicRanges_1.34.0 GenomeInfoDb_1.18.2  IRanges_2.16.0      
## [25] S4Vectors_0.20.1     BiocGenerics_0.28.0  gridExtra_2.3       
## [28] magick_2.2           readr_1.3.1          UpSetR_1.4.0        
## [31] edgeR_3.24.3         limma_3.38.3        
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_1.4-1            biovizBase_1.30.1          
##  [3] htmlTable_1.13.2            XVector_0.22.0             
##  [5] base64enc_0.1-3             dichromat_2.0-0            
##  [7] rstudioapi_0.10             bit64_0.9-7                
##  [9] fansi_0.4.0                 splines_3.5.1              
## [11] knitr_1.25                  polyclip_1.10-0            
## [13] zeallot_0.1.0               jsonlite_1.6               
## [15] Formula_1.2-3               Rsamtools_1.34.1           
## [17] cluster_2.1.0               compiler_3.5.1             
## [19] httr_1.4.1                  backports_1.1.4            
## [21] assertthat_0.2.1            Matrix_1.2-17              
## [23] lazyeval_0.2.2              cli_1.1.0                  
## [25] formatR_1.7                 acepack_1.4.1              
## [27] htmltools_0.3.6             prettyunits_1.0.2          
## [29] tools_3.5.1                 gtable_0.3.0               
## [31] glue_1.3.1                  GenomeInfoDbData_1.2.0     
## [33] reshape2_1.4.3              dplyr_0.8.3                
## [35] Rcpp_1.0.2                  cellranger_1.1.0           
## [37] vctrs_0.2.0                 Biostrings_2.50.2          
## [39] polylabelr_0.1.0            xfun_0.10                  
## [41] stringr_1.4.0               ensembldb_2.6.8            
## [43] XML_3.98-1.20               zlibbioc_1.28.0            
## [45] scales_1.0.0                BSgenome_1.50.0            
## [47] VariantAnnotation_1.28.13   hms_0.5.1                  
## [49] ProtGenerics_1.14.0         SummarizedExperiment_1.12.0
## [51] AnnotationFilter_1.6.0      yaml_2.2.0                 
## [53] curl_4.2                    memoise_1.1.0              
## [55] biomaRt_2.38.0              rpart_4.1-15               
## [57] latticeExtra_0.6-28         stringi_1.4.3              
## [59] RSQLite_2.1.2               checkmate_1.9.4            
## [61] GenomicFeatures_1.34.8      BiocParallel_1.16.6        
## [63] rlang_0.4.0                 pkgconfig_2.0.3            
## [65] matrixStats_0.55.0          bitops_1.0-6               
## [67] evaluate_0.14               lattice_0.20-38            
## [69] purrr_0.3.2                 labeling_0.3               
## [71] GenomicAlignments_1.18.1    htmlwidgets_1.3            
## [73] bit_1.1-14                  tidyselect_0.2.5           
## [75] plyr_1.8.4                  magrittr_1.5               
## [77] R6_2.4.0                    Hmisc_4.2-0                
## [79] DelayedArray_0.8.0          DBI_1.0.0                  
## [81] pillar_1.4.2                foreign_0.8-72             
## [83] withr_2.1.2                 survival_2.44-1.1          
## [85] RCurl_1.95-4.12             nnet_7.3-12                
## [87] tibble_2.1.3                crayon_1.3.4               
## [89] utf8_1.1.4                  rmarkdown_1.16             
## [91] bamsignals_1.14.0           progress_1.2.2             
## [93] locfit_1.5-9.1              grid_3.5.1                 
## [95] blob_1.2.0                  digest_0.6.21              
## [97] bezier_1.1.2                munsell_0.5.0